#include <cstdlib>
#include <iostream>
#include "string.h"
#include "fstream"

/*
Implementazione by Ugo Cirmignani
Analista programmatore (Roma)

Il sorgente puù essere compilato con Dev-cpp (windows), GCC (unix) e altri.

--Last update 1/1/2012
-Fixed a reset dictionary issue

*/

//262143 == 20 bit == 1gb circa
//#define dictionary_size 1000000

//262143 == 19 bit == 500 MB circa
//#define dictionary_size 524287

//262143 == 18 bit == 255 MB circa
//#define dictionary_size 262143

//17 bit
//#define dictionary_size 131071

//16 bit
#define dictionary_size 65535

//14 bit
//#define dictionary_size 16383


//9 bit
//#define dictionary_size 5000 

#define reset_dictionary 1 //Reset the dictionary if it become too big
#define bit_writing 1 //Use the bit writing mode, improve the data compression


using namespace std;


struct lzwn {
//The byte value!
unsigned char value;
//The index/prefix value
unsigned int index;
//The value code === node index
unsigned int code;
//Pointer
struct lzwn *p[256];
};

struct lzwn dictionary[dictionary_size+100];

//The writer buffer!
unsigned char write_buffer[5] = {0};
int wb_index;
int wb_rest;
int wb_bit_size;
int wb_processed;
int wb_operation_minus;
int wb_old_bit_size;
unsigned char given_rest_char;
int operation = 0;

char reversing_buffer[dictionary_size+1000];
int rb_index;

unsigned int full_after;

unsigned int after_full_after_successes;

unsigned long next_code = 0;
unsigned long primo, secondo, vecchio, givecode;

void dic_reset();
void dic_init();
int GetBitFlag( unsigned char, int);
unsigned char SetBitFlag(unsigned char, int, int);

unsigned int get_end_bit()
{
return ((1 << (wb_bit_size)) - 1);
}

void print_buffered(unsigned int value)
{

if (wb_rest != 0)
{
write_buffer[0] = (write_buffer[wb_index-1]) << 8-wb_rest;
write_buffer[0] = write_buffer[wb_index];
}
wb_index = 0;

for ( int i = wb_bit_size; i > 0; i-- )
{
unsigned char ilbit = ( ( ( value<<(32 - i) ) >> (31) ) );
write_buffer[wb_index] = SetBitFlag(write_buffer[wb_index], 7 - wb_rest, (int)ilbit );
wb_rest++;
if (wb_rest == 8 )
{
wb_index++;
wb_rest = 0;
}
}
}


int give_code(unsigned char ingresso)
{
if ( wb_bit_size == 8 )
{
givecode = ingresso;
given_rest_char = 0;
wb_processed = 0;
wb_rest =0;
return ingresso;
}
else
{
int bit_to_process = wb_bit_size - wb_processed;
if ( bit_to_process == wb_bit_size )
{
givecode = 0;
}

if (wb_rest > 0)
{
if ( (bit_to_process - wb_rest) >= 0 )
{
givecode += (unsigned int)( given_rest_char << (bit_to_process - wb_rest)) ;
wb_processed = wb_processed + wb_rest;
bit_to_process = bit_to_process - wb_rest;
wb_rest = 0;
if ( bit_to_process == 0 )
{
wb_processed = 0;
return givecode;
}
}
else
{
;
}
}


if ( (bit_to_process - 8) >= 0 )
{
givecode += (unsigned int)( ingresso <<  bit_to_process - 8);
wb_rest = 0;
wb_processed = wb_processed + 8;
bit_to_process = bit_to_process -8;

if ( bit_to_process == 0 )
{

wb_processed = 0;
return givecode;
}
else
{
return -1;
}
}
else
{
wb_rest = 8-bit_to_process;
given_rest_char = ( ( ingresso<<(bit_to_process) ) );
given_rest_char = given_rest_char >> bit_to_process;
givecode += (unsigned int)( ingresso>>(8-bit_to_process) );
wb_processed = 0;
return givecode;
}


}

return -1;
}


//Get bit 0-7 flag return 1 or 0, if any error return -1
int GetBitFlag( unsigned char MyByte, int BitN )
{
unsigned char B;

switch (BitN)
{
case 0:
B =(MyByte & 0x01);
return B;
break;

case 1:
B = (MyByte & 0x02);
B = (B >> 1);
return B;
break;

case 2:
B = (MyByte & 0x04);
B = (B >> 2);
return B;
break;

case 3:
B = (MyByte & 0x08);
B = (B >> 3);
return B;
break;

case 4:
B = (MyByte & 0x10);
B = (B >> 4);
return B;
break;

case 5:
B = (MyByte & 0x20);
B = (B >> 5);
return B;
break;

case 6:
B = (MyByte & 0x40);
B = (B >> 6);
return B;
break;

case 7:
B = (MyByte & 0x80);
B = (B >> 7);
return B;
break;

default:
return -1;
break;

}

return -1;
}

unsigned char SetBitFlag(unsigned char MyByte, int BitN, int value)
{
if (((BitN < 0) && (BitN > 7)) || ((value < 0) && (value > 1)))
return MyByte;


switch (BitN)
{
case 0:
if (value == 0)
{

if (GetBitFlag(MyByte, 0) == 0)
return MyByte; 
else 
{
MyByte = ( MyByte ^ 0x01); 
return MyByte;
}
}
if (value == 1)

if (GetBitFlag(MyByte, 0) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x01);
return MyByte;
}
break;

case 1:
if (value == 0)
{
if (GetBitFlag(MyByte, 1) == 0)
return MyByte;
else 
{
MyByte = (MyByte ^ 0x02); 
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 1) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x02);
return MyByte;
}
break;

case 2:
if (value == 0)
{
if (GetBitFlag(MyByte, 2) == 0)
return MyByte; 
else
{
MyByte = (MyByte ^ 0x04);
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 2) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x04);
return MyByte;
}
break;

case 3:
if (value == 0)
{

if (GetBitFlag(MyByte, 3) == 0)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x08);
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 3) == 1)
return MyByte;
else 
{
MyByte = (MyByte ^ 0x08);
return MyByte;
}
break;

case 4:
if (value == 0)
{
if (GetBitFlag(MyByte, 4) == 0)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x10); 
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 4) == 1)
return MyByte; 
else
{
MyByte = (MyByte ^ 0x10);
return MyByte;
}
break;
case 5:
if (value == 0)
{
if (GetBitFlag(MyByte, 5) == 0)
return MyByte;
else
{
MyByte = (MyByte ^ 0x20); // Need to set it to 0
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 5) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x20);
return MyByte;
}
break;

case 6:
if (value == 0)
{
if (GetBitFlag(MyByte, 6) == 0)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x40); // Need to set it to 0
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 6) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x40);
return MyByte;
}
break;

case 7:
if (value == 0)
{
if (GetBitFlag(MyByte, 7) == 0)
return MyByte;
else 
{
MyByte = (MyByte ^ 0x80); 
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 7) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x80);
return MyByte;
}
break;

case 8:
if (value == 0)
{
if (GetBitFlag(MyByte, 8) == 0)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x01); 
return MyByte;
}
}
if (value == 1)
if (GetBitFlag(MyByte, 8) == 1)
return MyByte; 
else 
{
MyByte = (MyByte ^ 0x01);
return MyByte;
}
break;

default:
return MyByte;
break;

}

return MyByte;
}

long find(long index, unsigned char value)
{
if ( dictionary[index].p[value] == NULL )
return -1;
else 
return dictionary[index].p[value]->code;
}

void put_to_table(unsigned long index, unsigned char value)
{

if ( bit_writing == 1 && operation  == 0 )
{



if ( next_code == get_end_bit()-2   )
{
wb_bit_size++;
}
}

if ( bit_writing == 1 && operation == 1 )
{
if ( next_code == get_end_bit()-4  )
{
wb_bit_size++;
}
}

dictionary[index].p[value] = &dictionary[next_code];
dictionary[next_code].value = value;
dictionary[next_code].index = index;
dictionary[next_code].code = next_code;
next_code++;
}


int compress_file(char *input, char *output)
{
int do_reset = 0;
after_full_after_successes = 0;
full_after = 0;
operation = 0;

FILE *fp = fopen(input, "rb");
if (fp == NULL) {
return 0;
}

int rc;

if ( (rc = fgetc(fp)) == EOF )
{
return 0;
}
else
primo = rc;

ofstream outfile;
outfile.open(output,ios::out | ios::binary );

restart_compression:


while ( (secondo = getc(fp)) != EOF )
{


long find_result = -1;

find_result = find(primo, secondo);

if ( find_result == -1 )
{

if ( next_code < dictionary_size - 1 ) 
{
put_to_table(primo, secondo);
if ( bit_writing == 0 )
{
outfile.put(primo / 256);
outfile.put(primo % 256);
}
else
{
print_buffered(primo);
for ( int x = 0; x < wb_index; x++ )
outfile.put(write_buffer[x]);
}
primo = secondo;
}
else
{
    if (reset_dictionary == 1 )
    {
        if ( bit_writing == 1 )
        {
        do_reset = 1;
        }
    dic_reset();
    }

    if ( bit_writing == 0 )
    {
    outfile.put(primo / 256);
    outfile.put(primo % 256);
    }
    else
    {
    print_buffered(primo);
    for ( int x = 0; x < wb_index; x++ )
    outfile.put(write_buffer[x]);
    if ( do_reset == 1 )
    {
    print_buffered(dictionary_size);
    for ( int x = 0; x < wb_index; x++ )
    outfile.put(write_buffer[x]);
    wb_bit_size=9; 
    }
    }

    primo = secondo;
    goto restart_compression;
}
}
else
{
primo = find_result;
}

}

fclose(fp);

if ( bit_writing == 0 )
{
outfile.put(primo / 256);
outfile.put(primo % 256);
}
else
{
print_buffered(primo);
for ( int x = 0; x < wb_index; x++ )
outfile.put(write_buffer[x]);
}

if ( wb_rest != 0 )
{
print_buffered(0);
for ( int x = 0; x < wb_index; x++ )
outfile.put(write_buffer[x]);
}

outfile.close();


return 1;
}

void decoding_printf(unsigned long pcodice)
{

rb_index = 0;

if ( pcodice < 256 ){
reversing_buffer[rb_index] = pcodice;
rb_index++;
}
else
{
while (pcodice >= 256)
{

if ( full_after > 0 )
after_full_after_successes++;

reversing_buffer[rb_index] = dictionary[pcodice].value;
rb_index++;
pcodice = dictionary[pcodice].index;
}

reversing_buffer[rb_index] = pcodice;
rb_index++;

}

}


long decompress_file(char *lfile, char *ofilen)
{
full_after = 0;
after_full_after_successes = 0;
int print_last_flag = 0;
operation = 1;
unsigned char fc;

ofstream ofile;
ofile.open(ofilen,ios::out | ios::binary );

FILE *fp = fopen(lfile, "rb");
if (fp == NULL) {
printf("\nErrore nell'aprire il file %s", lfile);
  return 0;
}

int rch;
int rcl;


decompress_begin:

if ( bit_writing == 1 )
{
rch = fgetc(fp);
while ( (give_code(rch)) == -1  )
{
if ( (rch = fgetc(fp)) == EOF )
{
return 0;
}
}

primo = givecode;
}
else
{
if ( ((rch = fgetc(fp)) == EOF) || ((rcl = fgetc(fp)) == EOF) )
{
printf("\nIl file in ingresso e' vuoto!");
return 0;
}
else
primo = (rch*256)+rcl;

}

decoding_printf(primo);
for ( int i = rb_index -1; i >= 0; i-- )
ofile.put(reversing_buffer[i]);
fc = primo;
vecchio = fc;

while(( (rch = fgetc(fp)) != EOF)  )
{
unsigned int next_old = 0;

if ( bit_writing == 1 )
{

while ( (give_code(rch)) == -1  )
{
if ( (rch = fgetc(fp)) == EOF )
{
return 0;
}
}


if (reset_dictionary == 1 && bit_writing == 1 )
{


if ( givecode == dictionary_size  )
{
dic_reset();
wb_bit_size = 9;

goto decompress_begin;
if ( ( rch = fgetc(fp)) == EOF )
{
return 0;
}

while ( (give_code(rch)) == -1  )
{

if ( (rch = fgetc(fp)) == EOF )
{
return 0;
}

}
secondo = givecode;
}
else
secondo = givecode;
}
else
secondo = givecode;
}
else
{
rcl = fgetc(fp);
secondo = (rch*256)+rcl;
}


if ( secondo == next_code  )
{

print_last_flag = 0;
decoding_printf(primo);
for ( int i = rb_index -1; i >= 0; i-- )
{
ofile.put(reversing_buffer[i]);
}
fc = reversing_buffer[rb_index -1];


print_last_flag = 0;
decoding_printf(vecchio);


for ( int i = rb_index -1; i >= 0; i-- )
{
ofile.put(reversing_buffer[i]);
}

}
else
{


print_last_flag = 0;
decoding_printf(secondo);
for ( int i = rb_index -1; i >= 0; i-- )
{
ofile.put(reversing_buffer[i]);
}
fc = reversing_buffer[rb_index -1];
}

vecchio = fc;
put_to_table(primo, vecchio);

print_last_flag = 1;
primo = secondo;
}



return 0;
}


void dic_init()
{

for ( int i = 0; i < dictionary_size; i++)
{
for ( int z = 0; z < 256; z++)
dictionary[i].p[z] = NULL;
}

for ( int i = 0; i < 256; i++){
dictionary[i].index -1; 
dictionary[i].value = i;
dictionary[i].code = i;


}
next_code = 256;
wb_bit_size = 9;
wb_rest = 0;
wb_processed = 0;
}

void dic_reset()
{

for ( int i = 0; i < dictionary_size; i++)
{
for ( int z = 0; z < 256; z++)
dictionary[i].p[z] = NULL;



}

for ( int i = 0; i < 256; i++){
dictionary[i].index -1; 
dictionary[i].value = i;
dictionary[i].code = i;
}
 
next_code = 256;
}



int main(int argc, char *argv[])
{
    
dic_init();

if ( argc < 3 )
{
printf("\n--- LZW File compression utility ---\n\nUsage: \n\tuclzw -c source destination (file compress)\n\tuclzw -x source destination (file compress)\n\n");
return 0;
}

if ( argv[1][1] == 'c' )
{
printf("\nThe file: %s will be compressed and saved as: %s\n\n", argv[2], argv[3]);
compress_file(argv[2],argv[3]);
}   

if ( argv[1][1] == 'x' )
{
printf("\nThe file: %s will be extract and saved as: %s\n\n", argv[2], argv[3]);

decompress_file(argv[2],argv[3]);
}
    
return EXIT_SUCCESS;
}

